knitr::opts_knit$set(root.dir = "/Users/amitmeir/Documents/rglab/flowReMix")
Loading Data
library(flowReMix)
library(ggplot2)
library(dplyr)
library(reshape2)
library(cowplot)
assign <- function(x) {
x$prop <- x$count / x$parentcount
assign <- as.numeric(by(x, x$subset, function(y) y$prop[1] > y$prop[2]))
assign[assign == 1] <- -1
result <- data.frame(ptid = x$ptid[1], subset = unique(x$subset), assign = assign)
return(result)
}
require(pROC)
require(reshape2)
data("rv144_booleans")
bySubset <- by(data.frame(booleans$stim, booleans$nonstim), booleans$Subset, function(x) x)
largerThanThershold <- sapply(bySubset, function(x) colSums(x >5))
booldata <- melt(booleans, c("PTID", "Subset"))
names(booldata)[3:4] <- c("stim", "count")
booldata <- by(booldata, INDICES = list(booldata$PTID, booldata$stim), function(x) {
x$parentcount <- sum(x$count)
return(x)
})
booldata <- do.call("rbind", booldata)
booldata <- subset(booldata, Subset != "!TNFa&!IFNg&!IL4&!IL2&!CD154&!IL17a")
booldata$treatment <- as.numeric(booldata$stim == "stim")
uniquepop <- unique(booldata$Subset)
booldata <- with(booldata, booldata[order(Subset, PTID, stim, decreasing = FALSE), ])
booldata <- subset(booldata, !is.na(Subset))
allsubset <- booldata
booldata <- with(booldata, booldata[order(Subset, PTID, stim, decreasing = FALSE), ])
# Naming ------------------
subsets <- unique(booldata$Subset)
booldata$Subset <- as.character(booldata$Subset)
nfunctions <- numeric(length(subsets))
for(i in 1:length(subsets)) {
split <- strsplit(as.character(subsets[i]), "&")[[1]]
first <- substr(split, 1, 1)
nfunction <- sum(first != "!")
nfunctions[i] <- nfunction
name <- paste(split[first != "!"], collapse = ",")
booldata$nfunction[booldata$Subset == subsets[[i]]] <- nfunction
booldata$Subset[booldata$Subset == subsets[[i]]] <- name
}
subsets <- unique(booldata$Subset)
booldata <- with(booldata, booldata[order(Subset, PTID, stim, decreasing = FALSE), ])
names(booldata) <- tolower(names(booldata))
# Getting vaccine information --------------------
data("rv144")
rv144 <- rv144[order(rv144$ptid), ]
vaccine <- (by(rv144, rv144$ptid, function(x) x$vaccine[1] == "VACCINE"))
vaccine <- data.frame(ptid = names(vaccine), vaccine = as.numeric(vaccine))
vaccinemat <- vaccine[vaccine$ptid %in% booldata$ptid, ]
# Getting infection status
data("rv144_correlates_data")
correlates <- rv144_correlates_data
correlates <- correlates[order(as.character(correlates$PTID)), ]
infection <- correlates$infect.y
# Converting low counts to booleans --------------
countByPop <- by(booldata, booldata$subset, function(x) max(x$count / x$parentcount) < 10^-3 / 2)
countByPop <- by(booldata, booldata$subset, function(x) {
if(max(x$count / x$parentcount) < 10^-3 / 2) {
x$count <- as.numeric(x$count > 0)
x$parentcount <- 1
}
return(x)
})
# booldata <- do.call("rbind", countByPop)
Data Analysis
library(flowReMix)
control <- flowReMix_control(updateLag = 25, nsamp = 100, initMHcoef = 2.5,
nPosteriors = 1, centerCovariance = TRUE,
maxDispersion = 10^3, minDispersion = 10^7,
randomAssignProb = 10^-8, intSampSize = 50,
lastSample = 100, isingInit = -log(99),
initMethod = "robust")
booldata$subset <- factor(booldata$subset)
preAssignment <- do.call("rbind", by(booldata, booldata$ptid, assign))
system.time(fit <- flowReMix(cbind(count, parentcount - count) ~ treatment,
subject_id = ptid,
cell_type = subset,
cluster_variable = treatment,
data = booldata,
covariance = "sparse",
ising_model = "sparse",
regression_method = "robust",
iterations = 40,
# cluster_assignment = preAssignment,
parallel = TRUE,
verbose = TRUE, control = control))
Loading Results
load(file = "data analysis/results/boolean robust15.Robj")
Scatter Plots
ids <- fit$posteriors[, 1:2]
vaccine[, 1] <- as.character(vaccine[, 1])
vaccine[, 1] <- factor(vaccine[, 1], levels = levels(ids[, 1]))
vaccine <- vaccine[!is.na(vaccine[, 1]), ]
vaccine <- vaccine[order(vaccine[, 1]), ]
ids <- merge(ids, vaccine, all.x = TRUE, all.y = FALSE,
by = "ptid", sort = FALSE)
vaccination <- ids[, 3]
scatter <- plot(fit, target = vaccination, type = "scatter",
ncol = 4)
scatter + theme_bw()
ROC plot
rocplot <- plot(fit, target = vaccination, type = "ROC", ncols = 5,
direction = "auto", thresholdPalette = NULL,
subsets = NULL)
rocplot
FDR Curves
plot(fit, target = vaccination, type = "FDR")
ROC table
rocResults <- rocTable(fit, vaccination, direction = ">", adjust = "BH",
sortAUC = FALSE)
rocResults[order(rocResults$auc, decreasing = TRUE), ]
Some Functionality Analysis
# Getting infection status
infectDat <- data.frame(ptid = rv144_correlates_data$PTID, infect = rv144_correlates_data$infect.y)
datId <- as.character(fit$posteriors$ptid)
infectID <- as.character(infectDat$ptid)
infectDat <- infectDat[infectID %in% datId, ]
infectDat$ptid <- factor(as.character(infectDat$ptid), levels = levels(booldata$ptid))
infectDat <- infectDat[order(infectDat$ptid), ]
ids <- merge(ids, infectDat, sort = FALSE)
infect <- ids[, 4]
infect[infect == "PLACEBO"] <- NA
# Computing Functionality Score
func <- rowSums(fit$posteriors[, -1])
funcAUC <- roc(infect ~ func)$auc
n0 <- sum(infect == "INFECTED", na.rm = TRUE)
n1 <- sum(infect == "NON-INFECTED", na.rm = TRUE)
print("AUC for detecting infection based on functionality score.")
[1] "AUC for detecting infection based on functionality score."
funcAUC
Area under the curve: 0.6126
pwilcox(funcAUC * n0 * n1, n0, n1, lower.tail = FALSE)
[1] 0.01412133
# Computing polyfunctionality score
nfunctions <- sapply(subsets, function(x) length(gregexpr(",", paste(",", x))[[1]]))
M <- 6
weights <- nfunctions / (choose(M, nfunctions))
poly <- apply(fit$posteriors[, -1], 1, function(x) weighted.mean(x, weights))
# poly <- apply(fit$posteriors[, -1], 1, function(x) median(weights * x))
polyAUC <- roc(infect ~ poly)$auc
n0 <- sum(infect == "INFECTED", na.rm = TRUE)
n1 <- sum(infect == "NON-INFECTED", na.rm = TRUE)
print("AUC for detecting infection based on polyfunctionality score.")
[1] "AUC for detecting infection based on polyfunctionality score."
polyAUC
Area under the curve: 0.6221
pwilcox(polyAUC * n0 * n1, n0, n1, lower.tail = FALSE)
[1] 0.008605355
print("AUCs for dicriminating between Vaccinees and Placebos based on functionality and polyfunctionality scores.")
[1] "AUCs for dicriminating between Vaccinees and Placebos based on functionality and polyfunctionality scores."
roc(vaccination ~ func)
Call:
roc.formula(formula = vaccination ~ func)
Data: func in 36 controls (vaccination 0) < 226 cases (vaccination 1).
Area under the curve: 0.9929
roc(vaccination ~ poly)
Call:
roc.formula(formula = vaccination ~ poly)
Data: poly in 36 controls (vaccination 0) < 226 cases (vaccination 1).
Area under the curve: 0.9931
Polyfunctionality Boxplots
hiv <- infect
nfunctions <- sapply(names(fit$posteriors)[-1], function(x) length(strsplit(x, ",", fixed = TRUE)[[1]]))
weights <- list()
weights$FS <- rep(1, ncol(fit$posteriors) - 1)
M <- 6
weights$PFS <- nfunctions / choose(6, nfunctions)
hiv[is.na(hiv)] <- "PLACEBO"
plot(fit, target = hiv, type = "boxplot", groups = "all", weights = weights)
Logistic Regressions
group <- c(24, 21, 15, 8)
score <- rowMeans(fit$posteriors[, group])
rocfit <- roc(infect ~ score)
pwilcox(rocfit$auc * n0 * n1, n0, n1, lower.tail = FALSE)
[1] 0.005360221
ids$groupscore <- score
ids$poly <- poly
ids$func <- func
vaccines <- subset(correlates, infect.y != "PLACEBO")
vaccines$PTID <- as.character(vaccines$PTID)
ids$ptid <- as.character(ids$ptid)
vaccines <- merge(vaccines, ids, all.x = TRUE, all.y = TRUE,
by.x = "PTID", by.y = "ptid")
vaccines <- subset(vaccines, vaccines$infect != "PLACEBO")
plot(vaccines$PFS, vaccines$poly, main = "COMPASS polyfunctionality vs. new polyfunctionality")
lines(lowess(vaccines$PFS, vaccines$poly), col = "red", lwd = 2)
abline(v = c(0.05, .085))
abline(h = c(0.35))
target <- vaccines$poly > 0.35 & vaccines$PFS > 0.05 & vaccines$PFS < 0.085
summary(glm(infect ~ func + IgAprim + risk.medium + risk.high + sex,
family = "binomial",
data = vaccines))
Call:
glm(formula = infect ~ func + IgAprim + risk.medium + risk.high +
sex, family = "binomial", data = vaccines)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.2258 -0.6466 -0.5075 -0.3969 2.4050
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.087e-03 1.024e+00 -0.006 0.99526
func -1.702e-01 9.385e-02 -1.813 0.06979 .
IgAprim 4.862e-01 1.818e-01 2.674 0.00749 **
risk.medium -6.699e-05 4.708e-01 0.000 0.99989
risk.high 6.324e-01 4.266e-01 1.482 0.13827
sex -4.352e-02 3.863e-01 -0.113 0.91031
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 204.72 on 225 degrees of freedom
Residual deviance: 192.57 on 220 degrees of freedom
AIC: 204.57
Number of Fisher Scoring iterations: 4
summary(glm(infect ~ poly + IgAprim + risk.medium + risk.high + sex,
family = "binomial",
data = vaccines))
Call:
glm(formula = infect ~ poly + IgAprim + risk.medium + risk.high +
sex, family = "binomial", data = vaccines)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.2498 -0.6299 -0.5132 -0.3850 2.4341
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.381070 1.101848 0.346 0.72946
poly -4.224542 2.079878 -2.031 0.04224 *
IgAprim 0.477983 0.180938 2.642 0.00825 **
risk.medium -0.001307 0.471387 -0.003 0.99779
risk.high 0.627893 0.427398 1.469 0.14180
sex -0.044041 0.387241 -0.114 0.90945
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 204.72 on 225 degrees of freedom
Residual deviance: 191.68 on 220 degrees of freedom
AIC: 203.68
Number of Fisher Scoring iterations: 4
summary(glm(infect ~ groupscore + IgAprim + risk.medium + risk.high + sex,
family = "binomial",
data = vaccines))
Call:
glm(formula = infect ~ groupscore + IgAprim + risk.medium + risk.high +
sex, family = "binomial", data = vaccines)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.2799 -0.6283 -0.5071 -0.3711 2.5203
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.30977 0.70412 -0.440 0.65998
groupscore -2.45746 1.06876 -2.299 0.02148 *
IgAprim 0.47807 0.18117 2.639 0.00832 **
risk.medium 0.01706 0.47256 0.036 0.97120
risk.high 0.61467 0.42860 1.434 0.15153
sex -0.04239 0.38871 -0.109 0.91316
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 204.72 on 225 degrees of freedom
Residual deviance: 190.15 on 220 degrees of freedom
AIC: 202.15
Number of Fisher Scoring iterations: 5
Logistic Regressions for Single Subsets
vaccines <- subset(correlates, infect.y != "PLACEBO")
resultList <- list()
adjRocList <- list()
for(i in 1:(ncol(fit$posteriors) - 1)) {
vaccines$post <- NULL
post <- fit$posteriors[!is.na(infect), c(1, i + 1)]
names(post)[2] <- "post"
vaccines <- merge(vaccines, post, by.x = "PTID", by.y = "ptid", all.x = TRUE)
resultList[[i]] <- summary(glm(infect.y ~ post + IgAprim + agecat + risk.medium + risk.high + sex,
family = "binomial",
data = vaccines))
resid <- lm(post ~ IgAprim + agecat + risk.medium + risk.high + sex,
data = vaccines)$residuals
infectResid <- glm(infect.y ~ IgAprim + agecat + risk.medium + risk.high + sex,
family = "binomial", data = vaccines)$residuals
adjRocList[[i]] <- roc(vaccines$infect.y ~ resid)
}
names(resultList) <- colnames(fit$posteriors)[-1]
names(adjRocList) <- colnames(fit$posteriors)[-1]
regResult <- t(sapply(resultList, function(x) x$coefficient[2, c(1,4)]))
regResult <- data.frame(regResult)
regResult$auc <- sapply(adjRocList, function(x) x$auc)
regResult$aucPval <- pwilcox(regResult$auc * n0 * n1, n0, n1, lower.tail = FALSE)
regResult$aucQval <- p.adjust(regResult$aucPval, method = "BH")
regResult[order(regResult[, 2], decreasing = FALSE), ]
Dependence Structure for Random Effects for Varying Threshold
# randStability <- stabilityGraph(fit, type = "randomEffects", cpus = 2, reps = 100,
# cv = TRUE)
# save(randStability, file = "data analysis/results/RV144rand15.Robj")
load("data analysis/results/RV144rand15.Robj")
for(threshold in c(0.5, 0.75, 0.85, 0.9, 0.95, 1)) {
randplot <- plot(randStability, fill = rocResults$auc, threshold = threshold)
print(randplot)
}
Loading required package: network
network: Classes for Relational Data
Version 1.13.0 created on 2015-08-31.
copyright (c) 2005, Carter T. Butts, University of California-Irvine
Mark S. Handcock, University of California -- Los Angeles
David R. Hunter, Penn State University
Martina Morris, University of Washington
Skye Bender-deMoll, University of Washington
For citation information, type citation("network").
Type help("network-package") to get started.
Loading required package: sna
Loading required package: statnet.common
sna: Tools for Social Network Analysis
Version 2.4 created on 2016-07-23.
copyright (c) 2005, Carter T. Butts, University of California-Irvine
For citation information, type citation("sna").
Type help(package="sna") to get started.
Loading required package: scales
Ising Network for Varying Threshold
# stability <- stabilityGraph(fit, type = "ising", cpus = 2, reps = 100)
# save(stability, file = "data analysis/results/RV144ising15.Robj")
load("data analysis/results/RV144ising15.Robj")
for(threshold in c(0.5, 0.75, 0.85, 0.9, 0.95, 1)) {
isingplot <- plot(stability, fill = rocResults$auc, threshold = threshold)
print(isingplot)
}